rm(list=ls())
library(epicalc)
library(sandwich)
zap()
library(foreign)

source("C:/Users/Martin Steinwand/Documents/R/MyRoutines/LegendLargebox.R")

setwd("C:/Users/Martin Steinwand/Documents/Project Lead Donor/Data")
alldat <- read.dta("RecipientYears.4.2.dta")
n <- nrow(alldat)
attach(alldat)


############################################################
## Building a typology 
summary(topshare[lead5of9r2==1&  ldtop5of9r2==1])
summary(diffshare1_2[lead5of9r2==1&  ldtop5of9r2==1])
summary(hhi[lead5of9r2==1])

#dummies for median of categories 
alldat$ld_topshare <- as.numeric(lead5of9r2==1&  ldtop5of9r2==1& topshare >median(topshare[lead5of9r2==1&  ldtop5of9r2==1]))
alldat$ld_topshare[lead5of9r2==0] <- NA
alldat$ld_diffshare <- as.numeric(lead5of9r2==1&  ldtop5of9r2==1& diffshare1_2 >median(diffshare1_2[lead5of9r2==1&  ldtop5of9r2==1]))
alldat$ld_diffshare[lead5of9r2==0] <- NA
alldat$ld_hhi <- as.numeric(lead5of9r2==1 & hhi > median(hhi[lead5of9r2==1]))
alldat$ld_hhi[lead5of9r2==0] <- NA

attributes(alldat)$var.labels[ncol(alldat)-2]<- "Aid share above median of lead donor aid shares"
attributes(alldat)$var.labels[ncol(alldat)-1]<- "Difference btw top and #2 aid shares above median of all lead donors"
attributes(alldat)$var.labels[ncol(alldat)]<- "HHI above median lead donor HHI"

table(alldat$ld_topshare, alldat$ld_diffshare)
table(alldat$ld_diffshare, alldat$ld_hhi)
table(alldat$ld_topshare,  alldat$ld_hhi)

table(alldat[alldat$ld_topshare==0,]$ld_diffshare, alldat[alldat$ld_topshare==0,]$ld_hhi)
table(alldat[alldat$ld_topshare==1,]$ld_diffshare, alldat[alldat$ld_topshare==1,]$ld_hhi)

#combine all three
alldat$ldstrong <- as.numeric(alldat$ld_hhi==1 & alldat$ld_diffshare ==1& alldat$ld_topshare ==1)
alldat$ldstrong[lead5of9r2==0] <- NA
attributes(alldat)$var.labels[ncol(alldat)] <- "Strong donor (topshare, #2-#1 contrib, hhi)"

#write.dta(alldat, "RecipientYears.4.2.dta", version=11)



####################################
#HHI and lead donorship
#Mean HHI is higher with lead donor
t.test(hhi[lead5of9r2==1], hhi[lead5of9r2==0&ODAdis_f>0])

#Show distribution of HHI for Lead donor & non-lead donor cases
den0 <- density(alldat[alldat$lead5of9r2==0&alldat$ODAdis_f>0,]$hhi,na.rm=TRUE)
den1 <- density(alldat[alldat$lead5of9r2==1,]$hhi,na.rm=TRUE)
den2 <- density(alldat[alldat$ldstrong==1,]$hhi,na.rm=TRUE)

par(mar=c(5, 4, 1, 2), cex=1)
plot(c(min(den0$x, den1$x), max(den0$x, den1$x)), c(min(den0$y, den1$y), max(den0$y, den1$y)), 
        type = "n", 
#        axes=F,
        xlab = "HHI, Disbursed Aid", 
        ylab = "Density", #turn off axes and labels. 
        #xlim=c(low, hgh), c(1,length(modname)), 
        xaxs = "r",
        cex.axis=1.2, cex.lab=1.2
) 
lines(den0$x, den0$y, lwd=1.8, lty=5)
lines(den1$x, den1$y, lwd=1.8, lty=3)
lines(den2$x, den1$y, lwd=1.8, lty=1)

legend(0.55,2.5, legend=c("No Lead Donor", "Lead Donor", "Strong Lead Donor"), lty=c(5,2,1), bty="n", cex=1.2)

#save plot
postscript(file="C:/Users/Martin Steinwand/Documents/Project Lead Donor/Analysis/FinalPaper/HHIbyLead.eps",onefile=FALSE,
 horizontal=FALSE, paper="letter", height=6.1, width=6.5)
par(mar=c(5, 4, 1, 2), cex=1)
plot(c(min(den0$x, den1$x), max(den0$x, den1$x)), c(min(den0$y, den1$y), max(den0$y, den1$y)), 
        type = "n", 
#        axes=F,
        xlab = "Donor concentration: HHI of disbursed aid", 
        ylab = "Density", #turn off axes and labels. 
        #xlim=c(low, hgh), c(1,length(modname)), 
        xaxs = "r",
        cex.axis=1.2, cex.lab=1.2
) 
lines(den0$x, den0$y, lwd=1.8, lty=5)
lines(den1$x, den1$y, lwd=1.8, lty=3)
lines(den2$x, den1$y, lwd=1.8, lty=1)

legend(0.55,2.5, legend=c("No Lead Donor", "Lead Donor", "Strong Lead Donor"), lty=c(5,3,1), bty="n", cex=1.2)
dev.off()


###################################################################################
## Lead donorship and distance in aid share of top and second largest contributor 

#Difference btw #1 and #2 top contributor is higher with lead donor
t.test(diffshare1_2[lead5of9r2==1&  ldtop5of9r2==1], diffshare1_2[lead5of9r2==0&ODAdis_f>0])

den0 <- density(alldat[alldat$lead5of9r2==0&alldat$ODAdis_f>0,]$diffshare1_2,na.rm=TRUE)
den1 <- density(alldat[alldat$lead5of9r2==1&  ldtop5of9r2==1,]$diffshare1_2,na.rm=TRUE)

par(mar=c(5, 4, 1, 2), cex=1)
plot(c(min(den0$x, den1$x), max(den0$x, den1$x)), c(min(den0$y, den1$y), max(den0$y, den1$y)), 
        type = "n", 
#        axes=F,
        xlab = "Difference in aid share, #1 & #2 contributor", 
        ylab = "Density", #turn off axes and labels. 
        #xlim=c(low, hgh), c(1,length(modname)), 
        xaxs = "r",
        cex.axis=1.2, cex.lab=1.2
) 
lines(den0$x, den0$y, lwd=1.8, lty=5)
lines(den1$x, den1$y, lwd=1.8, lty=1)
legend(0.55,2.5, legend=c("No Lead Donor", "With Lead Donor"), lty=c(5,1), bty="n", cex=1.2)


###################################################################################
## Lead donorship and aid share of top contributor 
t.test(topshare[lead5of9r2==1&  ldtop5of9r2==1], topshare[lead5of9r2==0&ODAdis_f>0])

den0 <- density(alldat[alldat$lead5of9r2==0&alldat$ODAdis_f>0,]$topshare,na.rm=TRUE)
den1 <- density(alldat[alldat$lead5of9r2==1&  ldtop5of9r2==1,]$topshare,na.rm=TRUE)

par(mar=c(5, 4, 1, 2), cex=1)
plot(c(min(den0$x, den1$x), max(den0$x, den1$x)), c(min(den0$y, den1$y), max(den0$y, den1$y)), 
        type = "n", 
#        axes=F,
        xlab = "Aid share of highest contributor", 
        ylab = "Density", #turn off axes and labels. 
        #xlim=c(low, hgh), c(1,length(modname)), 
        xaxs = "r",
        cex.axis=1.2, cex.lab=1.2
) 
lines(den0$x, den0$y, lwd=1.8, lty=5)
lines(den1$x, den1$y, lwd=1.8, lty=1)
legend(0.55,2.5, legend=c("No Lead Donor", "With Lead Donor"), lty=c(5,1), bty="n", cex=1.2)




###########################################################################
## descriptives by donor and/or region: strong versus weak


#weak versus strong
summary(alldat[leadid5of9r2==2,]$ldstrong)    #us
summary(alldat[leadid5of9r2==220,]$ldstrong)    #france
summary(alldat[leadid5of9r2==211,]$ldstrong)    #belgium
summary(alldat[leadid5of9r2==210,]$ldstrong)    #netherlands
summary(alldat[leadid5of9r2==255,]$ldstrong)    #germany
summary(alldat[leadid5of9r2==200,]$ldstrong)    #uk
summary(alldat[leadid5of9r2==380,]$ldstrong)    #sweden
summary(alldat[leadid5of9r2==385,]$ldstrong)    #norway

#by region
ldbyreg <- function(id, silent=TRUE){
    nid <- function(reg, id){
        nrow(alldat[leadid5of9r2==id&as.numeric(region)==reg&ldstrong==1
                &lead5of9r2==1,])   
    }
    ntot <- function(reg, id){
        nrow(alldat[as.numeric(region)==reg & ldstrong==1
                &lead5of9r2==1,])   
    }
    nid.w <- function(reg, id){
        nrow(alldat[leadid5of9r2==id&as.numeric(region)==reg
                &lead5of9r2==1,])   - nrow(alldat[leadid5of9r2==id&as.numeric(region)==reg
                &ldstrong==1&lead5of9r2==1,])                   
    }
    ntot.w <- function(reg, id){
        nrow(alldat[as.numeric(region)==reg&lead5of9r2==1,])   -
                nrow(alldat[as.numeric(region)==reg & ldstrong==1
                &lead5of9r2==1,])   
    }

    sout <- sapply(1:(length(summary(region))-1), nid.w, id=id)
    sout <- rbind(sout, sapply(1:(length(summary(region))-1), ntot.w, id=id))
    sout <- rbind(sout, sout[1,]/sout[2,])
    sout <- rbind(sout, sapply(1:(length(summary(region))-1), nid, id=id))
    sout <- rbind(sout, sapply(1:(length(summary(region))-1), ntot, id=id))
    sout <- rbind(sout, sout[4,]/sout[5,])
    colnames(sout) <- names(summary(region))[1:(length(summary(region))-1)]
    rownames(sout) <- c("weak ld", "weak ld total", "ratio weak", "strong ld", "strong ld total", 
                    "ratio strong")
    if(!silent) print(sout)
    return(sout)
}

dons <- c(2, 20, 200, 210, 220, 255, 740, 900)
rg <- sapply(dons, ldbyreg)
bars <- rg[c(6, 12, 18, 24, 30, 36, 42),]
rownames(bars) <- names(summary(region))[1:(length(summary(region))-1)]
dname <- c("USA", "Canada", "UK","Netherl.", "France", 
                    "Germany", "Japan", "Australia")
colnames(bars) <- dname
plot(c(0,8),
    c(0,1),          
    ylab="Share of Strong Lead Donors",            
    type = "n")                             

postscript(file="C:/Users/Martin Steinwand/Documents/Project Lead Donor/Analysis/FinalPaper/StrongLdRegion.eps",
        onefile=FALSE, horizontal=FALSE, paper="letter", height=4, width=8)
barplot(t(bars), width=rep(.8,8),density=c(5, 5, 5, 5, 20, 20, 20, 20), 
        angle=c(45, 0, -45, 90, 45, 0, -45, 90), ylab="Share of Strong Lead Donors", 
        ylim=c(0,1), cex.names=.8, cex.lab=1.2, xlim=c(0,8), names.arg=c("Central \nAmerica", 
        "South \nAmerica", "Europe", "Africa", "Middle \nEast", "Asia", "Oceania"))
legend.lb(6.7,1, legend=dname, density=c(5, 5, 5, 5, 20, 20, 20, 20), 
        angle=c(45, 0, -45, 90, 45, 0, -45, 90), fill=TRUE, bty="n")
dev.off()


############################################
## donorship over time

ny <- function(y){
        nrow(alldat[ldstrong==1 &year==y &is.na(ldstrong)==FALSE,])
}
ny.w <- function(y){
        nrow(alldat[lead5of9r2==1 &year==y & is.na(lead5of9r2)==FALSE & !ldstrong==1,])       
}
nytot <- function(y){
        nrow(alldat[year==y,])   
}

avghhi <- function(y){
        mean(hhi[year==y])
}

years <- as.integer(names(table(year)))
ldsy <- sapply(years, ny)/
        sapply(years, nytot)
names(ldsy) <- names(table(year))
ldwy <- sapply(years, ny.w)/
        sapply(years, nytot)
names(ldwy) <- names(table(year))
hhitrend <- sapply(years, avghhi)
names(hhitrend) <- names(table(year))

plot(c(min(years),max(years)),
    c(0,.6),          
    xlab = "Year",      
    ylab = "Share of Recipient Countries",            
    type = "n")                             
lines(years, ldsy,lty = 1, lwd=2)   
lines(years, ldwy,lty = 5, lwd=2)   
lines(years, hhitrend, lty=4, lwd=2)
lines(c(1974,1974), c(-0.5,.65), lty=3, lwd=2)
lines(c(2006,2006), c(-0.5,.65), lty=3, lwd=2)
legend(1970, .6, lty=c(1,5, 4), legend=c("Strong Lead Donorship", "Weak Lead Donorship",
                "Donor Concentration, HHI"))

postscript(file="C:/Users/Martin Steinwand/Documents/Project Lead Donor/Analysis/FinalPaper/LdYears.eps",
        onefile=FALSE, horizontal=FALSE, paper="letter", height=6, width=6)
plot(c(min(years),max(years)),
    c(0,.6),          
    xlab = "Year",      
    ylab = "Share of Recipient Countries",            
    type = "n")                             

lines(years, ldsy,lty = 1, lwd=1.5)   
lines(years, ldwy,lty = 5, lwd=1.5)   
lines(years, hhitrend, lty=4, lwd=2)
lines(c(1974,1974), c(-0.5,.65), lty=3, lwd=2)
lines(c(2006,2006), c(-0.5,.65), lty=3, lwd=2)
legend(1974.5, .6, lty=c(1,5, 4), legend=c("Strong Lead Donorship", "Weak Lead Donorship",
                "Donor Concentration, HHI"))
dev.off()


#Check for co-integration of HHI and lead donor variable

#implement Engle-Granger test http://en.wikipedia.org/wiki/Cointegration#Test

#regress the two time series on each other
mfit <- lm(ldsy~hhitrend-1)
#check for stationarity of residuals using augmented dickey fuller test
library(tseries)
adf.test(mfit$residuals)
#cointegration implies stationarity

mfit2 <- lm(ldwy~hhitrend-1)
adf.test(mfit2$residuals)

mfit3 <- lm(ldwy~ldsy-1)
adf.test(mfit3$residuals)
